#!/usr/bin/env perl
use warnings;
use strict;

# 
# Keep only the records that are in the select file.
# This script is really just intended for extracting the probe set from the
# training set.
#
# Usage:
# perl src/tuple_select.pl select <input >output
#
# Where input, select and output all have format
# movie,user[,rating][,date]
# movie,user[,rating][,date]
# movie,user[,rating][,date]
#
# Note that select is opened using perl open(), so it can be a command, as in
# perl src/tuple_select.pl "perl src/block_to_tuple.pl <blocks |" <input >output
#
# The script can also be used in the inverse sense (interchange users and
# movies).
#

scalar(@ARGV) == 1 or die "need select file as argument";
my ($select_file) = @ARGV;

# A hash of string keys... not very efficient but it works.
open(SELECT_FILE, $select_file);
my %select_hash = map {
  /^(\d+),(\d+).*$/ or die "unexpected input line format ($_)";
  "$1,$2" => 0;
} <SELECT_FILE>;
close SELECT_FILE;

# Read from standard in and select matching tuples.
while (<STDIN>) {
  /^(\d+),(\d+).*$/ or die "unexpected tuple line format ($_)";
  if (defined($select_hash{"$1,$2"})) {
    print;
  }
}

# Copyright (c) 2009 John Lees-Miller
# 
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
# 
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

